// Copyright 2001-2016 Crytek GmbH / Crytek Group. All rights reserved.

//
//	File:Cry_Math.h
//	Description: Misc mathematical functions
//
//	History:
//	-Jan 31,2001: Created by Marco Corbetta
//	-Jan 04,2003: SSE and 3DNow optimizations by Andrey Honich
//
//////////////////////////////////////////////////////////////////////

#ifndef CRY_SIMD_H
#define CRY_SIMD_H

#if _MSC_VER > 1000
	#pragma once
#endif

#include <CryCore/Platform/platform.h>

inline float AngleMod(float a)
{
	a = (float)((360.0 / 65536) * ((int)(a * (65536 / 360.0)) & 65535));
	return a;
}

inline float AngleModRad(float a)
{
	a = (float)((gf_PI2 / 65536) * ((int)(a * (65536 / gf_PI2)) & 65535));
	return a;
}
inline unsigned short Degr2Word(float f)
{
	return (unsigned short)(AngleMod(f) / 360.0f * 65536.0f);
}
inline float Word2Degr(unsigned short s)
{
	return (float)s / 65536.0f * 360.0f;
}

#if CRY_PLATFORM_X86
ILINE float __fastcall Ffabs(float f)
{
	*((unsigned*) &f) &= ~0x80000000;
	return (f);
}
#else
inline float Ffabs(float x) { return fabsf(x); }
#endif

#define mathMatrixRotationZ(pOut, angle)     (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationZ(angle)))
#define mathMatrixRotationY(pOut, angle)     (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationY(angle)))
#define mathMatrixRotationX(pOut, angle)     (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateRotationX(angle)))
#define mathMatrixTranslation(pOut, x, y, z) (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateTranslationMat(Vec3(x, y, z))))
#define mathMatrixScaling(pOut, sx, sy, sz)  (*(Matrix44*)pOut) = GetTransposed44(Matrix44(Matrix34::CreateScale(Vec3(sx, sy, sz))))

template<class T> inline void ExchangeVals(T& X, T& Y)
{
	const T Tmp = X;
	X = Y;
	Y = Tmp;
}

inline void mathMatrixPerspectiveFov(Matrix44A* pMatr, f32 fovY, f32 Aspect, f32 zn, f32 zf)
{
	f32 yScale = 1.0f / tan_tpl(fovY / 2.0f);
	f32 xScale = yScale / Aspect;

	f32 m22 = f32(f64(zf) / (f64(zn) - f64(zf)));
	f32 m32 = f32(f64(zn) * f64(zf) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = xScale;
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = yScale;
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = 0;
	(*pMatr)(2, 1) = 0;
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = -1.0f;
	(*pMatr)(3, 0) = 0;
	(*pMatr)(3, 1) = 0;
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 0;
}

inline void mathMatrixOrtho(Matrix44A* pMatr, f32 w, f32 h, f32 zn, f32 zf)
{
	f32 m22 = f32(1.0 / (f64(zn) - f64(zf)));
	f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = 2.0f / w;
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = 2.0f / h;
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = 0;
	(*pMatr)(2, 1) = 0;
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = 0;
	(*pMatr)(3, 0) = 0;
	(*pMatr)(3, 1) = 0;
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 1;

}

inline void mathMatrixOrthoOffCenter(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
	f32 m22 = f32(1.0 / (f64(zn) - f64(zf)));
	f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = 2.0f / (r - l);
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = 2.0f / (t - b);
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = 0;
	(*pMatr)(2, 1) = 0;
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = 0;
	(*pMatr)(3, 0) = (l + r) / (l - r);
	(*pMatr)(3, 1) = (t + b) / (b - t);
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 1.0f;
}

inline void mathMatrixOrthoOffCenterLH(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
	f32 m22 = f32(1.0 / (f64(zf) - f64(zn)));
	f32 m32 = f32(f64(zn) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = 2.0f / (r - l);
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = 2.0f / (t - b);
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = 0;
	(*pMatr)(2, 1) = 0;
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = 0;
	(*pMatr)(3, 0) = (l + r) / (l - r);
	(*pMatr)(3, 1) = (t + b) / (b - t);
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 1.0f;
}

inline void mathMatrixPerspectiveOffCenter(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
	f32 m22 = f32(f64(zf) / (f64(zn) - f64(zf)));
	f32 m32 = f32(f64(zn) * f64(zf) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = 2 * zn / (r - l);
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = 2 * zn / (t - b);
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = (l + r) / (r - l);
	(*pMatr)(2, 1) = (t + b) / (t - b);
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = -1;
	(*pMatr)(3, 0) = 0;
	(*pMatr)(3, 1) = 0;
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 0;
}

inline void mathMatrixPerspectiveOffCenterReverseDepth(Matrix44A* pMatr, f32 l, f32 r, f32 b, f32 t, f32 zn, f32 zf)
{
	f32 m22 = f32(-f64(zn) / (f64(zn) - f64(zf)));
	f32 m32 = f32(-f64(zn) * f64(zf) / (f64(zn) - f64(zf)));

	(*pMatr)(0, 0) = 2 * zn / (r - l);
	(*pMatr)(0, 1) = 0;
	(*pMatr)(0, 2) = 0;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = 0;
	(*pMatr)(1, 1) = 2 * zn / (t - b);
	(*pMatr)(1, 2) = 0;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = (l + r) / (r - l);
	(*pMatr)(2, 1) = (t + b) / (t - b);
	(*pMatr)(2, 2) = m22;
	(*pMatr)(2, 3) = -1;
	(*pMatr)(3, 0) = 0;
	(*pMatr)(3, 1) = 0;
	(*pMatr)(3, 2) = m32;
	(*pMatr)(3, 3) = 0;
}

inline void mathMatrixLookAt(Matrix44A* pMatr, const Vec3& Eye, const Vec3& At, const Vec3& Up)
{
	Vec3 vLightDir = (Eye - At);
	Vec3 zaxis = vLightDir.GetNormalized();
	Vec3 xaxis = (Up.Cross(zaxis)).GetNormalized();
	Vec3 yaxis = zaxis.Cross(xaxis);

	(*pMatr)(0, 0) = xaxis.x;
	(*pMatr)(0, 1) = yaxis.x;
	(*pMatr)(0, 2) = zaxis.x;
	(*pMatr)(0, 3) = 0;
	(*pMatr)(1, 0) = xaxis.y;
	(*pMatr)(1, 1) = yaxis.y;
	(*pMatr)(1, 2) = zaxis.y;
	(*pMatr)(1, 3) = 0;
	(*pMatr)(2, 0) = xaxis.z;
	(*pMatr)(2, 1) = yaxis.z;
	(*pMatr)(2, 2) = zaxis.z;
	(*pMatr)(2, 3) = 0;
	(*pMatr)(3, 0) = -xaxis.Dot(Eye);
	(*pMatr)(3, 1) = -yaxis.Dot(Eye);
	(*pMatr)(3, 2) = -zaxis.Dot(Eye);
	(*pMatr)(3, 3) = 1;
}

inline bool mathMatrixPerspectiveFovInverse(Matrix44_tpl<f64>* pResult, const Matrix44A* pProjFov)
{
	if ((*pProjFov)(0, 1) == 0.0f && (*pProjFov)(0, 2) == 0.0f && (*pProjFov)(0, 3) == 0.0f &&
	    (*pProjFov)(1, 0) == 0.0f && (*pProjFov)(1, 2) == 0.0f && (*pProjFov)(1, 3) == 0.0f &&
	    (*pProjFov)(3, 0) == 0.0f && (*pProjFov)(3, 1) == 0.0f && (*pProjFov)(3, 2) != 0.0f)
	{
		(*pResult)(0, 0) = 1.0 / (*pProjFov).m00;
		(*pResult)(0, 1) = 0;
		(*pResult)(0, 2) = 0;
		(*pResult)(0, 3) = 0;
		(*pResult)(1, 0) = 0;
		(*pResult)(1, 1) = 1.0 / (*pProjFov).m11;
		(*pResult)(1, 2) = 0;
		(*pResult)(1, 3) = 0;
		(*pResult)(2, 0) = 0;
		(*pResult)(2, 1) = 0;
		(*pResult)(2, 2) = 0;
		(*pResult)(2, 3) = 1.0 / (*pProjFov).m32;
		(*pResult)(3, 0) = (*pProjFov).m20 / (*pProjFov).m00;
		(*pResult)(3, 1) = (*pProjFov).m21 / (*pProjFov).m11;
		(*pResult)(3, 2) = -1;
		(*pResult)(3, 3) = (*pProjFov).m22 / (*pProjFov).m32;

		return true;
	}

	return false;
}

template<class T_out, class T_in> inline void mathMatrixLookAtInverse(Matrix44_tpl<T_out>* pResult, const Matrix44_tpl<T_in>* pLookAt)
{
	(*pResult)(0, 0) = (*pLookAt).m00;
	(*pResult)(0, 1) = (*pLookAt).m10;
	(*pResult)(0, 2) = (*pLookAt).m20;
	(*pResult)(0, 3) = (*pLookAt).m03;
	(*pResult)(1, 0) = (*pLookAt).m01;
	(*pResult)(1, 1) = (*pLookAt).m11;
	(*pResult)(1, 2) = (*pLookAt).m21;
	(*pResult)(1, 3) = (*pLookAt).m13;
	(*pResult)(2, 0) = (*pLookAt).m02;
	(*pResult)(2, 1) = (*pLookAt).m12;
	(*pResult)(2, 2) = (*pLookAt).m22;
	(*pResult)(2, 3) = (*pLookAt).m23;

	(*pResult)(3, 0) = T_out(-(f64((*pLookAt).m00) * f64((*pLookAt).m30) + f64((*pLookAt).m01) * f64((*pLookAt).m31) + f64((*pLookAt).m02) * f64((*pLookAt).m32)));
	(*pResult)(3, 1) = T_out(-(f64((*pLookAt).m10) * f64((*pLookAt).m30) + f64((*pLookAt).m11) * f64((*pLookAt).m31) + f64((*pLookAt).m12) * f64((*pLookAt).m32)));
	(*pResult)(3, 2) = T_out(-(f64((*pLookAt).m20) * f64((*pLookAt).m30) + f64((*pLookAt).m21) * f64((*pLookAt).m31) + f64((*pLookAt).m22) * f64((*pLookAt).m32)));
	(*pResult)(3, 3) = (*pLookAt).m33;
};

inline void mathVec4Transform(f32 out[4], const f32 m[16], const f32 in[4])
{
#define M(row, col) m[col * 4 + row]
	out[0] = M(0, 0) * in[0] + M(0, 1) * in[1] + M(0, 2) * in[2] + M(0, 3) * in[3];
	out[1] = M(1, 0) * in[0] + M(1, 1) * in[1] + M(1, 2) * in[2] + M(1, 3) * in[3];
	out[2] = M(2, 0) * in[0] + M(2, 1) * in[1] + M(2, 2) * in[2] + M(2, 3) * in[3];
	out[3] = M(3, 0) * in[0] + M(3, 1) * in[1] + M(3, 2) * in[2] + M(3, 3) * in[3];
#undef M
}

// Fix: replace by 3x4 Matrix transformation and move to crymath.
inline void mathVec3Transform(f32 out[4], const f32 m[16], const f32 in[3])
{
#define M(row, col) m[col * 4 + row]
	out[0] = M(0, 0) * in[0] + M(0, 1) * in[1] + M(0, 2) * in[2] + M(0, 3) * 1.0f;
	out[1] = M(1, 0) * in[0] + M(1, 1) * in[1] + M(1, 2) * in[2] + M(1, 3) * 1.0f;
	out[2] = M(2, 0) * in[0] + M(2, 1) * in[1] + M(2, 2) * in[2] + M(2, 3) * 1.0f;
	out[3] = M(3, 0) * in[0] + M(3, 1) * in[1] + M(3, 2) * in[2] + M(3, 3) * 1.0f;
#undef M
}

#define mathVec3TransformF(pOut, pV, pM) mathVec3Transform((f32*)pOut, (const f32*)pM, (f32*)pV)
#define mathVec4TransformF(pOut, pV, pM) mathVec4Transform((f32*)pOut, (const f32*)pM, (f32*)pV)
#define mathVec3NormalizeF(pOut, pV)     (*(Vec3*)pOut) = (((Vec3*)pV)->GetNormalizedSafe())
#define mathVec2NormalizeF(pOut, pV)     (*(Vec2*)pOut) = (((Vec2*)pV)->GetNormalizedSafe())

// Fix replace viewport by int16 array.
// Fix for d3d viewport.
inline f32 mathVec3Project(Vec3* pvWin, const Vec3* pvObj, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld)
{
	Vec4 in, out;

	in.x = pvObj->x;
	in.y = pvObj->y;
	in.z = pvObj->z;
	in.w = 1.0f;
	mathVec4Transform((f32*)&out, (f32*)pWorld, (f32*)&in);
	mathVec4Transform((f32*)&in, (f32*)pView, (f32*)&out);
	mathVec4Transform((f32*)&out, (f32*)pProjection, (f32*)&in);

	if (out.w == 0.0f)
		return 0.f;

	out.x /= out.w;
	out.y /= out.w;
	out.z /= out.w;

	//output coords
	pvWin->x = pViewport[0] + (1 + out.x) * pViewport[2] / 2;
	pvWin->y = pViewport[1] + (1 - out.y) * pViewport[3] / 2;  //flip coords for y axis

	//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
	float fViewportMinZ = 0, fViewportMaxZ = 1.0f;

	pvWin->z = fViewportMinZ + out.z * (fViewportMaxZ - fViewportMinZ);

	return out.w;
}

inline Vec3* mathVec3UnProject(Vec3* pvObj, const Vec3* pvWin, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld, int32 OptFlags)
{
	Matrix44A m, mA;
	Vec4 in, out;

	//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
	float fViewportMinZ = 0, fViewportMaxZ = 1.0f;

	in.x = (pvWin->x - pViewport[0]) * 2 / pViewport[2] - 1.0f;
	in.y = 1.0f - ((pvWin->y - pViewport[1]) * 2 / pViewport[3]);   //flip coords for y axis
	in.z = (pvWin->z - fViewportMinZ) / (fViewportMaxZ - fViewportMinZ);
	in.w = 1.0f;

	//prepare inverse projection matrix
	mA = ((*pWorld) * (*pView)) * (*pProjection);
	m = mA.GetInverted();

	mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);
	if (out.w == 0.0f)
		return NULL;

	pvObj->x = out.x / out.w;
	pvObj->y = out.y / out.w;
	pvObj->z = out.z / out.w;

	return pvObj;
}

inline Vec3* mathVec3ProjectArray(Vec3* pOut, uint32 OutStride, const Vec3* pV, uint32 VStride, const int32 pViewport[4], const Matrix44A* pProjection, const Matrix44A* pView, const Matrix44A* pWorld, uint32 n, int32)
{
	Matrix44A m;
	Vec4 in, out;

	int8* pOutT = (int8*)pOut;
	int8* pInT = (int8*)pV;

	Vec3* pvWin;
	Vec3* pvObj;

	//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
	float fViewportMinZ = 0, fViewportMaxZ = 1.0f;

	m = ((*pWorld) * (*pView)) * (*pProjection);

	for (uint32 i = 0; i < n; i++)
	{

		pvObj = (Vec3*)pInT;
		pvWin = (Vec3*)pOutT;

		in.x = pvObj->x;
		in.y = pvObj->y;
		in.z = pvObj->z;
		in.w = 1.0f;

		mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);

		if (out.w == 0.0f)
			return NULL;

		float fInvW = 1.0f / out.w;
		out.x *= fInvW;
		out.y *= fInvW;
		out.z *= fInvW;

		//output coords
		pvWin->x = pViewport[0] + (1 + out.x) * pViewport[2] / 2;
		pvWin->y = pViewport[1] + (1 - out.y) * pViewport[3] / 2;  //flip coords for y axis

		pvWin->z = fViewportMinZ + out.z * (fViewportMaxZ - fViewportMinZ);

		pOutT += OutStride;
		pInT += VStride;
	}

	return pOut;
}

inline Vec3* mathVec3UnprojectArray(Vec3* pOut, uint32 OutStride, const Vec3* pV, uint32 VStride, const int32 pViewport[4], const Matrix44* pProjection, const Matrix44* pView, const Matrix44* pWorld, uint32 n, int32 OptFlags)
{
	Vec4 in, out;
	Matrix44 m, mA;

	int8* pOutT = (int8*)pOut;
	int8* pInT = (int8*)pV;

	Vec3* pvWin;
	Vec3* pvObj;

	//FIX: update fViewportMinZ fViewportMaxZ support for Viewport everywhere
	float fViewportMinZ = 0, fViewportMaxZ = 1.0f;

	mA = ((*pWorld) * (*pView)) * (*pProjection);
	m = mA.GetInverted();

	for (uint32 i = 0; i < n; i++)
	{
		pvWin = (Vec3*)pInT;
		pvObj = (Vec3*)pOutT;

		in.x = (pvWin->x - pViewport[0]) * 2 / pViewport[2] - 1.0f;
		in.y = 1.0f - ((pvWin->y - pViewport[1]) * 2 / pViewport[3]);   //flip coords for y axis
		in.z = (pvWin->z - fViewportMinZ) / (fViewportMaxZ - fViewportMinZ);
		in.w = 1.0f;

		mathVec4Transform((f32*)&out, m.GetData(), (f32*)&in);

		CRY_MATH_ASSERT(out.w != 0.0f);

		if (out.w == 0.0f)
			return NULL;

		pvObj->x = out.x / out.w;
		pvObj->y = out.y / out.w;
		pvObj->z = out.z / out.w;

		pOutT += OutStride;
		pInT += VStride;
	}

	return pOut;
}

/*****************************************************
   MISC FUNCTIONS
*****************************************************/

#if CRY_PLATFORM_X86
inline int fastftol_positive(float f)
{
	int i;

	f -= 0.5f;
	#if defined(_MSC_VER)
	__asm fld[f]
	__asm fistp[i]
	#elif defined(__GNUC__)
	__asm__("fld %[f]\n fistpl %[i]" :[i] "+m" (i) :[f] "m" (f));
	#else
		#error
	#endif
	return i;
}
#else
inline int fastftol_positive(float f)
{
	CRY_MATH_ASSERT(f >= 0.f);
	return (int)floorf(f);
}
#endif

#if CRY_PLATFORM_X86
inline int fastround_positive(float f)
{
	int i;
	CRY_MATH_ASSERT(f >= 0.f);
	#if defined(_MSC_VER)
	__asm fld[f]
	__asm fistp[i]
	#elif defined(__GNUC__)
	__asm__("fld %[f]\n fistpl %[i]" :[i] "+m" (i) :[f] "m" (f));
	#else
		#error
	#endif
	return i;
}
#else
inline int fastround_positive(float f)
{
	CRY_MATH_ASSERT(f >= 0.f);
	return (int) (f + 0.5f);
}
#endif

#if CRY_PLATFORM_X86
ILINE int __fastcall FtoI(float x)
{
	int t;
	#if defined(_MSC_VER)
	__asm
	{
		fld x
		fistp t
	}
	#elif defined(__GNUC__)
	__asm__("fld %[x]\n fistpl %[t]" :[t] "+m" (t) :[x] "m" (x));
	#else
		#error
	#endif
	return t;
}
#else
inline int FtoI(float x) { return (int)x; }
#endif

#endif //math
